### -------------------------------------------------- 
### ---- General Notes ---- 
### -------------------------------------------------- 

# [Add Notes]


### -------------------------------------------------- 
### ---- General Set Up ---- 
### -------------------------------------------------- 

### Clear global environment
rm(list=ls()) 


### Libraries:
library(pacman)
p_load(tidyverse, foreign, readstata13, readr, here)


### File Location (R project only)
here::i_am("Survey Only Match/103_match_tfa_to_matregion.R")
file_path <- here()


### -------------------------------------------------- 
### ---- Download TFA Data ----
### -------------------------------------------------- 

### Download Data
tfa_dat <- read.dta13(paste0(file_path,"/Data/tfa_data.dta"))

### -------------------------------------------------- 
### ---- matriculatedregion to State ----
### -------------------------------------------------- 

### Create a list of unique matriculatedregion values
mr_unique <- tfa_dat %>% distinct(matriculatedregion) 

### Match matriculatedregion with state names as values
data(state)
est_state_all <- rep(NA, length(mr_unique[,1]))
for(name in seq_along(state.name)){
  for(i in seq_along(mr_unique[,1])){
    if(grepl(state.name[name], mr_unique[i,"matriculatedregion"]))
      est_state_all[i] <- state.abb[name]
  }
}

### Identify remaning matriculatedregion values
mr_unique_state <- mr_unique %>% 
  select(matriculatedregion) %>%
  dplyr::mutate(state_mr = est_state_all) 

mr_unique_state %>% 
  filter(is.na(state_mr)) %>%
  print()

# List of states and abbreviations 
data(state)
cbind(state.abb,state.name)

### Assign remaning matriculatedregion values
mr_unique_state %<>%
  mutate(state_mr = ifelse(matriculatedregion=="Memphis","TN",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Twin Cities","MN",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Baltimore","MD",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="D.C. Region","DC",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Colorado","CO",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Bay Area","CA",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Houston","TX",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Metro Atlanta","GA",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Philadelphia","PA",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="St. Louis","MO",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Phoenix","AZ",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Charlotte","NC",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Las Vegas Valley","NV",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Hawai'i","HI",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Miami-Dade","FL",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Los Angeles","CA",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Chicago","IL",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Rio Grande Valley","TX",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Milwaukee","WI",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Jacksonville","FL",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Dallas - Fort Worth","TX",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Greater Nashville","TN",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="San Antonio","TX",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Detroit","MI",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Sacramento","CA",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Appalachia",NA,state_mr)) %>% #Note: Appalachia covers multiple states
  mutate(state_mr = ifelse(matriculatedregion=="San Diego","CA",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Orlando","FL",state_mr)) %>%
  mutate(state_mr = ifelse(matriculatedregion=="Buffalo","NY",state_mr)) %>%
  print()

### Match to TFA individuals
tfa_dat_matregmatch <- tfa_dat

tfa_dat_matregmatch <- left_join(x = tfa_dat_matregmatch,
                                 y = mr_unique_state,
                                 by = "matriculatedregion") 


### -------------------------------------------------- 
### ---- Saved Matched matriculatedregion AND  ---- 
###      PhysicalZipCode Data
### -------------------------------------------------- 

tfa_dat_matregmatch_FINAL <- tfa_dat_matregmatch

save(tfa_dat_matregmatch_FINAL, 
     file=paste0(file_path,"/Survey Only Match/Temp_Data/tfa_to_state_matreg.RData"))


